clear all
set more off

global path "~/shared_space/dbaharc/BHOR-YugoslavianRefugees/"

//do "${path}do_files/TradeDatasetConstruction.do"

/*Preparing IVs*/
/*
use "${path}data/data_mig.dta", clear
//keep if year==1975 | year==1980 | year==1990 | year==2000 | year==2010
keep if year==1975 | year==1980 | year==1995
egen countryproduct = group(sitc2 nation)
qui ds mig*, v(32)
local vlist = r(varlist)
foreach v of local vlist {
    ren `v' `v'_
}
reshape wide mig*, i(countryproduct) j(year)
ren nation iso3
ren sitc2 product
replace iso3 = "YUG" if iso3=="FYUG"
tempfile migrants_wide
save `migrants_wide'

use "${path}data/data_mig.dta", clear
ren nation iso3 
replace iso3 = "YUG" if iso3=="FYUG"
ren sitc2 product
tempfile allmig
save `allmig'
*/

/*Yugoslavians random allocation*/
//use "${path}data/data_mig_fyug_est_v207.dta", clear
use "${path}data/data_mig_fyug_est_v300.dta", clear
ren nation nation2
merge m:1 nation2 year using "${path}data/ah100d01_mig_pop_fyug.dta", keep(master matched) nogen keepusing(mig_asyl)
//Create shares by dividing by total asylum seekers per year
ds mig_est*, v(32)
local vlist = r(varlist)
foreach v of local vlist {
    local vname = subinstr("`v'", "mig", "share",1)
    g `vname' = `v'/mig_asyl
}
drop mig_asyl
ren nation2 iso3 
replace iso3 = "YUG" if iso3=="FYUG"
ren sitc2 product
tempfile YUGmighat
save `YUGmighat'
//keep if year==1975 | year==1980 | year==1990 | year==2000 | year==2010
keep if year==1991 | year==1992 | year==1993 | year==1994 | year==1995 
egen countryproduct = group(product iso3)
qui ds mig* share*, v(32)
local vlist = r(varlist)
foreach v of local vlist {
    local vv = subinstr("`v'","_","",1)
    ren `v' `vv'_
}
reshape wide mig* share*, i(countryproduct) j(year)
qui ds migest_asyl* shareest_asyl*
local ivlist = r(varlist)
forvalues y =1991/1995 {
    local ivlist = subinstr("`ivlist'", "_`y'", "",.)
}
local ivlist : list uniq ivlist
foreach var of local ivlist {
	egen `var'_9195 = rowmean(`var'*) //Avg expected asylum seekers by industry 1991-1995
}
keep iso3 countryproduct product *_9195
tempfile YUGmighatwide
save `YUGmighatwide'

/*FDI data*/
use "${path}data/fdi_1990_2010.dta", clear
ren sitc2 product_2d
ren nation iso3
foreach c in SRB HRV SVN MKD BIH {
    g temp = fdi if iso3=="`c'"
    replace temp = 0 if mi(temp)
    egen fdi`c' = max(temp), by(product_2d year)
    g fdino`c' = fdi-fdi`c'  if iso3=="YUG"
    drop temp fdi`c'
}
keep if iso3 == "YUG"
tempfile fdicentralbank
save `fdicentralbank'

use "${path}data/YugoslavianRefugeesTradeDB.dta", clear
//drop if iso3 =="DEU"  //We dont need germany
//merge m:1 iso3 product year using `allmig', keep(master matched) nogen
//merge m:1 iso3 product using `migrants_wide', keep(master matched) nogen

/*Drop those countries with no exports whatsoever in a given year*/
egen countrytotal = total(exp), by(countryid year)
drop if countrytotal == 0
drop countrytotal

preserve
/*Concordance from NAICS to SITC*/
//qui do "~/shared_space/dbaharc/General_Data/CodesConcordances/NAICS-SIC_to_SITC.do" naics 2 sitc2 1 //NAICS 4 digits and SITC 4 digits
//save "${path}data/NAICS4-SITC4.dta", replace
use "${path}data/naics5811.dta", clear
/*Follow Nathan Nunn QJE definitions of k*/
g kp = cap/(pay+cap)
keep naics year kp
g naics4 = int(naics/100)
collapse kp, by(naics4 year)
ren naics4 naics
joinby naics using "${path}data/NAICS4-SITC4.dta", unmatched(master)
keep if _merge==3
/*Renormalizing weights for those that are not merged perfectly: SITC that has less NAICS matched*/
egen temp = sum(share_sitc2), by(sitc2 year)
g weight = share_sitc2 / temp
keep naics kp sitc2 weight year
foreach v in kp {
    replace `v' = `v' * weight
}
collapse (mean) kp, by(sitc2)
drop if mi(sitc2)
label var kp "Capital Intensity"
center kp, s
xtile q_kp=kp, n(4)
ren sitc product
tempfile intensities
save `intensities'

/*Human Capital Intensity*/
use "${path}/data/SITC4_HumanCapitalIntensity.dta", clear
center HCI, s
xtile q_HCI=HCI, n(4)
tempfile hci
save `hci'

/*Bahar 2019 KI + R&D measures, industry level*/
/*Concordance from NAICS to SITC*/
use "${path}data/NAICS_KI.dta", clear
ren ONETcumexp_wage ONETki
replace naics = naics/100
joinby naics using "${path}data/NAICS4-SITC4.dta", unmatched(master)
keep if _merge==3
/*Renormalizing weights for those that are not merged perfectly: SITC that has less NAICS matched*/
egen temp = sum(share_sitc2), by(sitc2)
g weight = share_sitc2 / temp
keep naics ONETki sitc2 weight
replace ONETki = ONETki * weight
collapse (mean) ONETki, by(sitc2)
drop if mi(sitc2)
label var ONETki "Knowledge Intensity (ONET)"
center ONETki, s
ren sitc product
tempfile onetki
save `onetki'

qui do "${path}do_files/NAICS-SIC_to_SITC.do" naics 0 sitc2 1 //NAICS 4 digits and SITC 4 digits tempfile naics6
tempfile naics6
save `naics6'
use "${path}data/RDOrbisNT.dta", clear
keep if year==2000
keep naics randd_intensity
ren randd_intensity rd_orbis
destring naics, replace

merge 1:1 naics using "${path}data/RDCompustatKY.dta", keep(master matched using) nogen
ren rdint rd_compustat
drop sales rde
foreach v in rd_orbis rd_compustat {
    replace `v'=0  if mi(`v')
}
joinby naics using `naics6', unmatched(master)
keep if _merge==3
/*Renormalizing weights for those that are not merged perfectly: SITC that has less NAICS matched*/
egen temp = sum(share_sitc2), by(sitc2)
g weight = share_sitc2 / temp
keep naics rd_* sitc2 weight 
replace rd_orbis = rd_orbis * weight
replace rd_compustat = rd_compustat * weight
collapse (mean) rd_*, by(sitc2)
drop if mi(sitc2)
center rd_*, s
ren sitc product
tempfile rdki
save `rdki'
restore

/*Antras Upstreamness measures */
preserve
use "${path}data/UpstreamnessSITC.dta", clear
center upstreamness, s
tempfile upstreamness
save `upstreamness'
restore

/*Costinot et al. (2012) measures*/
merge 1:1 iso3 product year using "${path}data/CDKasinh.dta", keep(master matched) nogen

/**********************/
/*DIFF IN DIFF DATASET*/
/**********************/
merge m:1 product using `intensities',keep(master matched) nogen
merge m:1 product using `hci', keep(master matched) nogen
merge m:1 product using `onetki', keep(master matched) nogen
merge m:1 product using `rdki', keep(master matched) nogen
merge m:1 product using `upstreamness', keep(master matched) nogen
ren product sitc4
merge m:1 sitc4 using "${path}data/Rauch1999-DifVsHom.dta", keep(master matched) nogen
ren con h_con
ren lib h_lib
ren sitc4 product


preserve
use "${path}data/data_mig_fyug_rm4_v300.dta", clear //Adding treatments with occupation characteristics
//keep if nation=="FYUG"
replace nation = "YUG" if nation=="FYUG"
ren nation iso3
tempfile treatment
save `treatment'
restore

/*
preserve
use "${path}data/data_mig_fyug_rm4_v205.dta", clear //Adding treatments with occupation characteristics
//keep if nation=="FYUG"
replace nation = "YUG" if nation=="FYUG"
ren nation iso3
tempfile treatment
save `treatment'
restore

preserve
use "${path}data/data_mig_fyug_rm4_v205_wgr.dta", clear //Adding treatments with wage growth
cap drop mig_rm_ls00_edu_* mig_rm_ls00_bf2_* mig_rm_ls00_svam_* mig_rm_ls00_sv50_* mig_rm_ls00_svct_*
replace nation = "YUG" if nation=="FYUG"
ren nation iso3
tempfile wagegrowth
save `wagegrowth'
restore

preserve
use "${path}data/data_mig_fyug_rm4_v205_rX.dta", clear //Adding treatments with different return years
replace nation = "YUG" if nation=="FYUG"
ren nation iso3
tempfile diffreturns
save `diffreturns'
restore

preserve
use "${path}data/data_mig_fyug_rm4_v205_dur.dta", clear //Adding treatments weighted by duration in the sample
replace nation = "YUG" if nation=="FYUG"
ren nation iso3
tempfile durationW
save `durationW'
restore
*/

//keep if iso3=="YUG"
preserve
ren product sitc2
/*New treatment using sum of migrants between 1990 and 2000 who left by 2000 (3/14/2018)*/
merge m:1 iso3 sitc2 using `treatment', keep(master matched) nogen
//merge m:1 iso3 sitc2 using `wagegrowth', keep(master matched) nogen
//merge m:1 iso3 sitc2 using `diffreturns', keep(master matched) nogen
//merge m:1 iso3 sitc2 using `durationW', keep(master matched) nogen
ren sitc2 product
tempfile templong
save `templong' 
restore

use `templong', clear
keep if iso3=="YUG"
keep product year rca_xdeu exp_xdeu rcadeu expdeu
keep if year==1980 | year==1985 | year==1990 | year==1995 | year==2000 | year==2005 | year==2010
reshape wide rca_xdeu exp_xdeu rcadeu expdeu, i(product) j(year)
tempfile tempwide
save `tempwide'

use `templong', clear
merge m:1 product using `tempwide',keep(master matched) nogen
merge m:1 product using `YUGmighatwide',keep(master matched) nogen
//merge 1:1 iso3 product year using `YUGmighat',keep(master matched) nogen
merge m:1 product year using `YUGmighat',keep(master matched) nogen
qui ds mig* share*, v(32)
local miglist = r(varlist)
foreach m of local miglist {
    qui replace `m' = 0 if mi(`m')
    //qui g ln`m' = asinh(`m')
    //qui g ln`m' = log(`m')
}

g product_2d = floor(product/100)
//merge m:1 iso3 product_2d year using `fdicentralbank', keep(master matched) nogen
merge m:1 product_2d year using `fdicentralbank', keep(master matched) nogen
qui ds fdi*, v(32)
local vlist = r(varlist)
foreach v of local vlist {
    replace `v' = 0 if mi(`v')
} 


/**********************************************/
/*Add GDP and Pop from WDI & Madisson dataset */
/**********************************************/
/*
use ~/shared_space/dbaharc/General_Data/WDI/WDIv2017.dta , clear
g yug = countrycode=="SCG" | countrycode=="SRB" | countrycode=="KOS" | countrycode=="HRV" ///
       | countrycode=="SVN" | countrycode=="MNE" | countrycode=="MKD" | countrycode=="BIH" ///
       | countrycode=="YUG"
replace countrycode="YUG" if yug
keep countrycode year sp_pop_totl sl_tlf_totl_in
collapse (sum) sp_pop_totl sl_tlf_totl_in, by(countrycode year)
replace sl_tlf_totl_in=. if sl_tlf_totl_in==0
ren countrycode iso3
*/

preserve
use "${path}data/mpd2018.dta" , clear
keep if (year==1990 | year==2000)
keep countrycode cgdppc rgdpnapc pop year
ren countrycode iso3
//g n = _n
reshape wide cgdppc rgdpnapc pop, i(iso3) j(year)
tempfile wdigdp
save `wdigdp'
restore
merge m:1 iso3 using `wdigdp', keep(master matched) nogen


preserve
use "${path}data/mpd2018.dta", clear
keep countrycode pop year
ren countrycode iso3
tempfile mdpop
save `mdpop'
restore

merge m:1 iso3 year using `mdpop', keep(master matched) nogen


/**************************/
/* Add geographic regions */
/**************************/
rename iso3 wbcode
merge m:1 wbcode using "${path}data/World Bank Regions and Continents.dta", keep(master matched) nogen
rename wbcode iso3
drop region

/***********************/
/*Add Factor Endowments*/
/***********************/
merge m:1 product year using "${path}data/SITC_FactorEndowments.dta", keep(master matched) nogen
//Data is only for 1990 and 2000, thus the match is so small

/***************************************************************************************************/
/* Compute German imports of intermediate goods for each SITC4 code, using US IO Tables (BEA 2002) */
/***************************************************************************************************/
preserve
/* Using concordance based on Cunat and Melitz to convert from SIC to SITC (check footnote 24 of this paper
http://scholar.harvard.edu/files/melitz/files/cunat_and_melitz_-_2012_-_volatility_labor_market_flexibility_and_the_patt.pdf
Yildirim wrote a code that uses years 2000-2006 from Feenstra's data to create the concordance with proper weights
*/
//qui do "~/shared_space/dbaharc/General_Data/CodesConcordances/NAICS-SIC_to_SITC.do" naics 2 sitc2 1 //NAICS 4 digits and SITC 4 digits
//save "${path}data/NAICS4-SITC4.dta", replace

/*I first work the I-O table (2002) in NAICS and convert it to SITC. Original data is from BEA Website*/
use "${path}data/NAICS4-SITC4.dta", clear
g naicsstr = string(naics)
drop naics
ren naicsstr naics
tempfile naics_sitc
save `naics_sitc'

use "${path}data/IO2002BEA_NAICS.dta", clear
drop if naics_input==naics_output //Eliminate numbers in diagonal
ren naics_input naics
joinby naics using `naics_sitc', unmatched(both)
ren (sitc share_sitc2) (SITC_input share_SITC_input)
drop naics _merge cvalue total_sitc2 total_naics share_naics

ren naics_output naics
joinby naics using `naics_sitc', unmatched(both)
ren (sitc share_sitc2) (SITC_output share_SITC_output)
drop naics _merge cvalue total_sitc2 total_naics share_naics

drop if mi(SITC_output) | mi(SITC_input)
drop if SITC_input==SITC_output
gen weight =  share_SITC_input *  share_SITC_output
egen sum_weight = sum(weight), by(SITC_output SITC_input)
replace iocoeff=iocoeff*weight/sum_weight 
collapse (sum) iocoeff, by(SITC_input SITC_output)

/*Changing units of measurement to dollars of input to 1 dollar of output*/
bys SITC_output: egen total_iocoeff = total(iocoeff)
replace iocoeff = iocoeff / total_iocoeff
drop total_iocoeff
drop if iocoeff == 0
save "${path}data/SITC4_IOTable.dta", replace
restore

/*Computing for each SITC code (product) the imports of its intermediate goods from Germany (using IO coefficient as weights)*/
preserve 
keep if iso3=="YUG" //For computational purposes, we do this only for YUG
keep iso3 product impdeu year
ren product SITC_input
joinby SITC_input using "${path}data/SITC4_IOTable.dta", unmatched(master)
drop _merge
g ioimpdeu = impdeu*iocoeff
collapse (sum) ioimpdeu, by(iso3 SITC_output year)
ren SITC_output product
tempfile ioimpdeu
save `ioimpdeu'
restore

merge 1:1 iso3 product year using `ioimpdeu', keep(master matched) keepusing(ioimpdeu) nogen


/*******************************/
/*** Compute Exports per cap ***/
/*******************************/
g exppc = exp/(pop)
g exppc_xdeu = exp_xdeu/(pop)
label var exppc "Exports per 1000 people"
label var exppc_xdeu "Exports per 1000 people"

/**********************/
/*** Compute RCApop ***/
/**********************/
egen totexp_py = total(exp), by(product year)
egen t = tag(iso3 year)
g temp = pop*t
egen totpop_y = total(temp), by(year)

g rcapop = (exp/pop)/(totexp_py/totpop_y)
g rcapop_xdeu = (exp_xdeu/pop)/(totexp_py/totpop_y)

drop totexp_py t temp totpop_y

/*Variables to use throughout the analysis*/
g lnexp = log(exp_xdeu)
g lnexpplus1 = log(exp_xdeu+1)
g asinhexp = asinh(exp_xdeu)

g lnexppc = log(exppc_xdeu)
g lnexppcplus1 = log(exppc_xdeu+1)
g asinhexppc = asinh(exppc_xdeu)

g lnfdi = asinh(fdi)

egen globalexp = total(exp), by(product year)
g lnglobalexp = asinh(globalexp)
g globalexpshare = exp_xdeu/globalexp

egen totalexpdeu = total(expdeu), by(iso3 year)
g germanexpshare = expdeu/totalexpdeu
g temp = germanexpshare if year==1990
egen germanexpshare1990 = max(temp), by(product)
drop temp

g temp = asinh(expdeu) if year==1990
egen germanexp1990 = max(temp), by(product)
drop temp

g Rdeu1995 = rcadeu1995 >=1

save "${path}data/GermanYUGMigration.dta", replace

